******** UPDATES ***************************************************

12/13/07: CHANGED FROM GZIP MACRO TO USING GZIP COMMEND.
8/6/08: CHANGED BACK TO GZIP MACRO. 
*****************************************************************;

******** NOTE ***************************************************
I add 9000000 to newid for 84-85 data because this variable
started over at one starting in 1986, which caused repeated IDs
when merging 84-85 with later years
*****************************************************************;

*%INCLUDE "~/jim/gzipseq.mcr";
%include "~/jim/poverty/include/infile_cons.mcr";
LIBNAME alldat '~/jim/data';

*******  FAMILY FILES   **********;

filename fmly801 pipe 'gunzip -c ~/jim/data/ce8081/08423-0001-Data.txt.gz';
filename fmly802 pipe 'gunzip -c ~/jim/data/ce8081/08423-0005-Data.txt.gz';
filename fmly803 pipe 'gunzip -c ~/jim/data/ce8081/08423-0009-Data.txt.gz';
filename fmly804 pipe 'gunzip -c ~/jim/data/ce8081/08423-0013-Data.txt.gz';

filename fmly811 pipe 'gunzip -c ~/jim/data/ce8081/08423-0017-Data.txt.gz';
filename fmly812 pipe 'gunzip -c ~/jim/data/ce8081/08423-0021-Data.txt.gz';
filename fmly813 pipe 'gunzip -c ~/jim/data/ce8081/08423-0025-Data.txt.gz';
filename fmly814 pipe 'gunzip -c ~/jim/data/ce8081/08423-0029-Data.txt.gz';
filename fmly815 pipe 'gunzip -c ~/jim/data/ce8081/08423-0033-Data.txt.gz';

filename home841 pipe 'gunzip -c ~/jim/data/ce_mtab/1984/08671-0003-Data.txt.gz';
filename home842 pipe 'gunzip -c ~/jim/data/ce_mtab/1984/08671-0007-Data.txt.gz';
filename home843 pipe 'gunzip -c ~/jim/data/ce_mtab/1984/08671-0011-Data.txt.gz';
filename home844 pipe 'gunzip -c ~/jim/data/ce_mtab/1984/08671-0015-Data.txt.gz';
filename home845 pipe 'gunzip -c ~/jim/data/ce_mtab/1984/08671-0019-Data.txt.gz';


%MACRO read(qyr,statement);
 
  DATA famly&qyr;
   INFILE fmly&qyr lrecl=3400;
   INPUT &statement;
   IF &qyr IN (815,855,955,1045) THEN ex_yr=1;
     ELSE ex_yr=0;
   srv_year=FLOOR(&qyr/10)+ex_yr;
   qyear=&qyr;
   newid=9000000+newid;
  RUN;
 
  PROC MEANS; 
  RUN; 
 
%MEND read; 

%read(801,&all8081);
%read(802,&all8081); 
%read(803,&all8081);
%read(804,&all8081); 
 
%read(811,&all8081); 
%read(812,&all8081); 
%read(813,&all8081); 
%read(814,&all8081);
%read(815,&all8081); 


***************  84:1  *************************************************;

DATA hm841;  
INFILE home841;
INPUT newid 1-8 ucc 9-14 propvalu 15-26;
IF ucc=800721;

newid=9000000+newid;
RUN;

***************  84:2  *************************************************;

DATA hm842;  
INFILE home842;
INPUT newid 1-8 ucc 9-14 propvalu 15-26;
IF ucc=800721;

newid=9000000+newid;
RUN;

***************  84:3  *************************************************;

DATA hm843;  
INFILE home843;
INPUT newid 1-8 ucc 9-14 propvalu 15-26;
IF ucc=800721;

newid=9000000+newid;
RUN;

***************  84:4  *************************************************;

DATA hm844;  
INFILE home844;
INPUT newid 1-8 ucc 9-14 propvalu 15-26;
IF ucc=800721;

newid=9000000+newid;
RUN;

***************  84:5  *************************************************;

DATA hm845;  
INFILE home845;
INPUT newid 1-8 ucc 9-14 propvalu 15-26;
IF ucc=800721;

newid=9000000+newid;
RUN;


*******************************************************************
GET 84 HOME DATA TO IMPUTE RENTEQ FOR 80-81
*******************************************************************;
DATA home;
   SET hm841 hm842 hm843 hm844 hm845;

   numhous=1;
   *prop values are at monthly level, so multiply by 4 and sum across the 3 months;
   propvalu=propvalu*4;

RUN;

PROC SORT DATA=home;
BY newid;
PROC MEANS SUM NOPRINT;
     BY newid;
     VAR numhous propvalu;
     OUTPUT OUT=out_h(DROP=_FREQ_ _TYPE_) SUM=n_hous totpval;
RUN;


DATA cexdat;
MERGE
famly801 famly802 famly803 famly804
famly811 famly812 famly813 famly814 famly815;
BY newid;
RUN;

PROC DATASETS LIBRARY=work;
DELETE
famly801 famly802 famly803 famly804
famly811 famly812 famly813 famly814 famly815
hm841 hm842 hm843 hm844 hm845 home 
;
RUN;


DATA famstep8081;
SET cexdat(rename=(propvalu=totpval));

lft5=floor(newid/10);
rt1=newid-lft5*10;

IF sex_ref=2 AND fam_type=8 THEN swnk=1;   
ELSE swnk=0; 
IF sex_ref=2 AND fam_type=7 THEN sm=1;
ELSE sm=0; 
IF 2 LE fam_type LE 4 AND perslt18 GT 0 THEN mm=1;
ELSE mm=0;

rent=SUM(OF rendwepq rendwecq);
urban=2-bls_urbn;
rooms=MIN(rooms,15);
bathrm=MIN(bathrm,15);
IF region=1 then reg1=1;
   ELSE reg1=0;
IF region=2 then reg2=1;
   ELSE reg2=0;
IF region=3 then reg3=1;
   ELSE reg3=0;
smsa=2-smsastat;

   win_ac=0; 
   cen_ac=0; 
   IF ac=1 AND ac_type=1 THEN cen_ac=1; 
   IF ac=1 AND ac_type=2 THEN win_ac=1;   

IF totpval NE . THEN totpval=MIN(totpval,158000); 
 
totexp_nh=totexpcq+totexppq-SUM(OF owndwepq owndwecq);
 
IF totpval GT 0 THEN ln_totpval=LOG(totpval);
   ELSE ln_totpval=0;
IF totexp_nh GT 0 THEN ln_totexp_nh=LOG(totexp_nh);
   ELSE ln_totexp_nh=0;
    
IF 801 LE qyear LE 804 THEN DO;
   r_totpval=totpval;
   IF r_totpval GT 0 THEN r_ln_totpval=LOG(r_totpval);
     ELSE r_ln_totpval=0;
   r_totexp_nh=totexp_nh;
   IF r_totexp_nh GT 0 THEN r_ln_totexp_nh=LOG(r_totexp_nh);
     ELSE r_ln_totexp_nh=0;
END; 
* Convert to 1980 prices using cpi-u-rs;
IF 811 LE qyear LE 815 THEN DO; 
   r_totpval=totpval*0.914;
   IF r_totpval GT 0 THEN r_ln_totpval=LOG(r_totpval);
     ELSE r_ln_totpval=0;
   r_totexp_nh=totexp_nh*0.914;
   IF r_totexp_nh GT 0 THEN r_ln_totexp_nh=LOG(r_totexp_nh);
     ELSE r_ln_totexp_nh=0;
END; 

IF srv_year=81 THEN yr81=1;
   ELSE yr81=0;

RUN;

*******************************************************************
IMPUTE RENTEQ FOR 80-81
*******************************************************************;
*%gunzipsq(~/jim/data,famstep1,all,qyear<845); 

PROC SORT DATA=alldat.famstep1;
BY newid;

DATA regtemp;
   MERGE out_h(IN=in1 KEEP=newid totpval n_hous) alldat.famstep1(keep=newid renteq sex_ref fam_type perslt18 cutenure totexpcq totexppq
                                totexppq2 totexpcq2 qyear fam_size owndwepq owndwecq);
   BY newid;
   IF in1;

   IF 842<=qyear<=845;

*  IF 0 LT totpval LT 200000;
   IF n_hous=3;

totexp_nh=totexppq2+totexpcq2-SUM(OF owndwepq owndwecq);

IF totpval GT 0 THEN ln_totpval=LOG(totpval);
   ELSE ln_totpval=0; 
IF totexp_nh GT 0 THEN ln_totexp_nh=LOG(totexp_nh);
   ELSE ln_totexp_nh=0;


* Convert to 1980 prices using cpi-u-rs;
   r_totpval=totpval*0.79;
   IF r_totpval GT 0 THEN r_ln_totpval=LOG(r_totpval);
     ELSE r_ln_totpval=0;
   r_totexp_nh=totexp_nh*0.79; 
   IF r_totexp_nh GT 0 THEN r_ln_totexp_nh=LOG(r_totexp_nh);
     ELSE r_ln_totexp_nh=0; 
   r_renteq=renteq*0.79; 
   IF r_renteq GT 0 THEN r_ln_renteq=LOG(r_renteq);
     ELSE r_ln_renteq=0; 


IF sex_ref=2 AND fam_type=8 THEN swnk=1;
ELSE swnk=0;
IF sex_ref=2 AND fam_type=7 THEN sm=1;
ELSE sm=0;
IF 2 LE fam_type LE 4 AND perslt18 GT 0 THEN mm=1;
ELSE mm=0;

PROC MEANS;
RUN;

PROC REG DATA=regtemp OUTEST=param;
    WHERE cutenure IN(1,2,3);
    MODEL r_ln_renteq = r_ln_totpval r_ln_totexp_nh fam_size sm mm swnk;
RUN;

**************************************************************
This generates the predicted values or renteq using the
parameters generated from regression above
**************************************************************;
PROC SCORE DATA=famstep8081 SCORE=param OUT=predhome TYPE=PARMS;
   ID newid;
   VAR r_ln_totpval r_ln_totexp_nh fam_size sm mm swnk;
RUN;

PROC MEANS DATA=predhome;
RUN;

PROC DATASETS LIBRARY=work;
DELETE param regtemp;
RUN;

PROC SORT DATA=predhome;
BY newid;
RUN;

*******************************************************************
IMPUTE RENT FOR LOW RENT UNITS
*******************************************************************;

DATA temprent;
SET famstep8081;
 
IF cutenure=4 AND rent GT 25;

ln_rent=LOG(rent);
RUN;


PROC REG OUTEST=param;
    MODEL ln_rent =  rooms reg1 reg2 reg3 win_ac cen_ac urban smsa bathrm yr81;
RUN;
 
PROC SCORE DATA=famstep8081 SCORE=param OUT=predrent TYPE=PARMS;
   ID newid;
   VAR rooms reg1 reg2 reg3 win_ac cen_ac urban smsa bathrm yr81;
RUN;
 
PROC MEANS DATA=predrent;
RUN;

PROC SORT DATA=predrent;
BY newid;

*****************************************************************
Merge housing data car data with the master dataset
*****************************************************************;

DATA alldat.famstep8081;
MERGE famstep8081(IN=in1 RENAME=(renteq=renteq_orig))
      predhome(RENAME=(model1=renteq_pred))
      predrent;
BY newid;
IF in1;

IF 801 LE qyear LE 804 THEN DO;
   IF cutenure IN(1,2,3) THEN renteq=EXP(renteq_pred);
     ELSE renteq=0;
END; 
IF 811 LE qyear LE 815 THEN DO; 
   IF cutenure IN(1,2,3) THEN renteq=EXP(renteq_pred)*1.09;
     ELSE renteq=0;
END; 


IF cutenure=4 AND rent=0 AND model1 GT 0 THEN rent2=0.75*EXP(model1);
ELSE rent2=rent; 
RUN;

PROC MEANS;
RUN;

PROC SORT; 
BY qyear;
PROC MEANS; 
BY qyear;
RUN;

*%gzipsq(work,famstep8081,~/jim/data); 

